{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Applying Winsorization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "from sklearn.datasets import load_breast_cancer\n",
    "from sklearn.model_selection import train_test_split\n",
    "from feature_engine.outliers import Winsorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((398, 30), (171, 30))"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X, y = load_breast_cancer(return_X_y=True, as_frame=True)\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X,\n",
    "    y,\n",
    "    test_size=0.3,\n",
    "    random_state=0,\n",
    ")\n",
    "\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# lower limits\n",
    "\n",
    "q05 = X_train.quantile(0.05).to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# upper limits\n",
    "\n",
    "q95 = X_train.quantile(0.95).to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_t = X_train.clip(lower=q05, upper=q95)\n",
    "test_t = X_test.clip(lower=q05, upper=q95)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "min     0.071170\n",
       "max     0.222600\n",
       "mean    0.132529\n",
       "Name: worst smoothness, dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "var = 'worst smoothness'\n",
    "\n",
    "X_train[var].agg([\"min\", \"max\", \"mean\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "min     0.096053\n",
       "max     0.173215\n",
       "mean    0.132063\n",
       "Name: worst smoothness, dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_t[var].agg([\"min\", \"max\", \"mean\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_boxplot_and_hist(data, variable):\n",
    "    f, (ax_box, ax_hist) = plt.subplots(\n",
    "        2, sharex=True, gridspec_kw={\"height_ratios\": (0.50, 0.85)}\n",
    "    )\n",
    "\n",
    "    sns.boxplot(x=data[variable], ax=ax_box)\n",
    "    sns.histplot(data=data, x=variable, ax=ax_hist)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVPElEQVR4nO3df7RdZX3n8fcXLhEVhEAyLAiJFyq1ZZqW6MVWHJWa6RqmPxRbCjo1BpWi44/W2prq2DXj6l822ir9MdU0IGlKCwpatDoqxoQyFZEbg9wBhooUQgQlWFRk6o8L3/ljPyHHm9zck3vOPufcPO/XWmdln2f/+ubcuz93n+ec/ezITCRJ9Ths2AVIkgbL4Jekyhj8klQZg1+SKmPwS1JlxoZdQDeWLFmS4+Pjwy5DkhaU7du3P5SZS2e2L4jgHx8fZ3JycthlSNKCEhH37q/drh5JqozBL0mVMfglqTIGvyRVxuCXpMoY/JJUmQXxdU4tDC/9tfN4+JsPDbuM1i0+fgkfvebqYZchzZvBr755+JsP8ciZrx7a/o+++bLB7P/my9rfh9Qiu3okqTIGvyRVxuCXpMoY/JJUGYNfkipj8EtSZQx+SaqMwS9JlTH4JakyBr8kVcbgl6TKGPySVBmD/yCdffbZwy5BOiR5bA2OwS9JlTH4JakyBr8kVcbgl6TKGPySVBmDX5IqY/BLUmUMfkmqjMEvSZUx+CWpMga/JFVmbNgFtGXLli1s3ryZnTt3smLFCtasWcPq1auHXZakLvXzGB61bc21jbbz65AM/i1btrBx40bWrVvHypUrmZqaYv369QCGv7QA9PMYHrVtzbWNQeTXIdnVs3nzZtatW8eqVasYGxtj1apVrFu3js2bNw+7NEld6OcxPGrbmmsbg8ivkT3jj4iLgYsBVqxYcVDr7ty5k5UrV/5I28qVK9m5c2dfanP4WPk70K5+HsOjtq25ttF2fsEIB39mbgA2AExMTOTBrLtixQqmpqZYtWrVE21TU1MH/QdkNtu2bevLdg41NYWhvwP91/n7089jeNS2Ndc22s4vOES7etasWcP69evZsWMH09PT7Nixg/Xr17NmzZphlyapC/08hkdtW3NtYxD5NbJn/L3Y8wHIJZdc8sSn4hdddJEf7EoLRD+P4VHb1lzbGER+ReZB9aIMxcTERE5OTg67DKB5O+rb/P07++yzeeTMVw9t/0fffNlA9n/0zZf5O9ACj63+i4jtmTkxs/2Q7OqRJM3O4Jekyhj8klQZg1+SKmPwS1JlDH5JqozBL0mVMfglqTIGvyRVxuCXpMoY/JJUGYP/IDmWiNQOj63BMfglqTIGvyRVxuCXpMoY/JJUGYNfkipj8EtSZQx+SaqMwS9JlTH4JakyBr8kVcbgl6TKGPySVJmxYRegQ8fi45fAzZcNtYajB7D/xccvaX0fUpsMfvXNR6+5etglSOqCXT2SVBmDX5IqY/BLUmUMfkmqjMEvSZUx+CWpMpGZw65hThGxG7h3yGUsAR4acg0HYn29GfX6YPRrtL7etFHf0zNz6czGBRH8oyAiJjNzYth1zMb6ejPq9cHo12h9vRlkfXb1SFJlDH5JqozB370Nwy5gDtbXm1GvD0a/RuvrzcDqs49fkirjGb8kVcbgl6TKGPySVBmDX5IqY/BLUmUMfkmqjMEvSZUx+CWpMga/JFXG4Jekyhj8klQZg1+SKmPwS1JlxoZdQDeWLFmS4+Pjwy5DkhaU7du3P7S/Wy+2GvwRcSywEfgpIIFXA3cCVwHjwD3A+Zn58IG2Mz4+zuTkZJulStIhJyL2e6/ytrt6LgE+lZk/AfwMcAfwNmBLZp4GbCnPJUkD0lrwR8QxwAuASwEy8weZ+S3gJcCmstgm4Ny2apAk7avNM/5TgN3AByNiR0RsjIinAidk5gNlma8DJ+xv5Yi4OCImI2Jy9+7dLZYpSXVpM/jHgGcBf5mZq4BHmdGtk819H/d778fM3JCZE5k5sXTpPp9NSJLmqc3g3wXsysybyvOraf4QfCMiTgQo/z7YYg3SnJYtX0FEzOuxbPmKYZcvHbTWvtWTmV+PiPsi4pmZeSewGri9PNYC7yr/XttWDVI37t91Hxd84PPzWveq157V52qk9rX9Pf43AVdExCLgbuBVNO8yPhQRrwHuBc5vuQZJUodWgz8zbwEm9jNrdZv7lSTNziEbJKkyBr8kVcbgl6TKGPySVBmDX5IqY/BLUmUMfo0Mr6CVBmNB3IhFdfAKWmkwPOOXpMoY/JJUGYNfkipj8EtSZQx+SaqMwS9JlTH4JakyBr8kVcYLuHRoOGyMiBh2FdKCYPDr0PD4tFf9Sl2yq0eSKmPwS1JlDH5JqozBL0mVMfglqTIGvyRVxuCXpMoY/JJUGYNffdPLPXO96lYaHK/cVd/0cs9c8ApaaVA845ekyhj8klQZg1+SKtN68EfE4RGxIyL+oTw/JSJuioi7IuKqiFjUdg2SpL0Gccb/28AdHc//CHhvZj4DeBh4zQBqkCQVrQZ/RJwM/BKwsTwP4EXA1WWRTcC5bdYgSfpRbZ/xvw9YBzxenh8PfCszp8vzXcCy/a0YERdHxGRETO7evbvlMiWpHq0Ff0T8MvBgZm6fz/qZuSEzJzJzYunSpX2uTpLq1eYFXM8DXhwRvwgcCTwNuAQ4NiLGyln/ycDXWqxBkjRDa2f8mfn2zDw5M8eBlwGfy8zfALYC55XF1gLXtlWDJGlfw/ge/+8Db4mIu2j6/C8dQg2aRS/j7UhaGAYyVk9mbgO2lem7gecMYr86eL2Mt+NYO9LC0NUZf0Q8r5s2SdLo67ar58+6bJMkjbgDdvVExHOBs4ClEfGWjllPAw5vszBJUjvm6uNfBBxVlju6o/077P1mjiRpATlg8Gfm9cD1EXF5Zt47oJokSS3q9ls9T4qIDcB45zqZ+aI2ipIktafb4P8w8H6awdYea68caYE5bGze1zCcdPJyvnbfzj4XJM2t2+Cfzsy/bLUSaSF6fNrrHrTgdPt1zo9HxOsj4sSIOG7Po9XKJEmt6PaMf235960dbQmc2t9yJElt6yr4M/OUtguRJA1GV8EfEa/cX3tm/nV/y5Ekta3brp4zO6aPBFYDXwIMfklaYLrt6nlT5/OIOBa4so2CJEntmu94/I8C9vtL0gLUbR//x2m+xQPN4Gw/CXyoraIkSe3pto//PR3T08C9mbmrhXokSS3rqqunDNb2f2lG6FwM/KDNoiRJ7en2DlznA18Efh04H7gpIhyWWZIWoG67et4BnJmZDwJExFLgs8DVbRUmSWpHt9/qOWxP6BffPIh1JUkjpNsz/k9FxKeBvyvPLwA+2U5J6tWy5Su4f9d9wy5D0oia6567zwBOyMy3RsSvAv+hzLoRuKLt4jQ/9++6z6GCJc1qrjP+9wFvB8jMjwAfAYiIlWXer7RYmySpBXP105+QmVMzG0vbeCsVSZJaNVfwH3uAeU/uYx2SpAGZK/gnI+I3ZzZGxEXA9nZKkiS1aa4+/jcDH42I32Bv0E8Ai4CXtliXJKklBwz+zPwGcFZE/DzwU6X5E5n5udYrkyS1otvx+LcCW1uuRarLYWNExLxXP+nk5Xztvp19LEi16PYCLkn99vj0vK+3AK+50Py1NuxCRCyPiK0RcXtE3BYRv13aj4uI6yLiK+XfxW3VIEnaV5vj7UwDv5uZpwM/B7whIk4H3gZsyczTgC3luSRpQFoL/sx8IDO/VKYfAe4AlgEvATaVxTYB57ZVw0K2bPkKImJeD0k6kIH08UfEOLAKuInmauAHyqyvAycMooaFxvF2JLWl9aGVI+Io4BrgzZn5nc55mZnsvZfvzPUujojJiJjcvXt322VKUjVaDf6IOIIm9K8og7wBfCMiTizzTwQe3N+6mbkhMycyc2Lp0qVtlilJVWnzWz0BXArckZl/0jHrY8DaMr0WuLatGiRJ+2qzj/95wBpgKiJuKW3/DXgX8KGIeA1wL809fCVJA9Ja8Gfm/wZm+4rJ6rb2K0k6MO+bK0mVMfglqTIGvyRVxuCXpMoY/JJUGYNfkipj8EtSZQx+SaqMwS9JlTH4JakyBr8kVcbgl6TKGPySVBmDX5IqY/BLUmUM/pYsW76CiJj3Q5La0uYduKp2/677uOADn5/3+le99qw+ViNJe3nGLy1Uh43N+x3l2KIj573usuUrhv0/V48845cWqsen5/2u8qrXntXTulrYPOOXpMoY/JJUGYNf0sHp4bMFPx8YDfbxSzo4PX62oOHzjF/S4PTwbsF3DP3jGb+kwenh3QL4jqFfPOOXpMoY/JJUGYNfkipj8EtSZQz+A+hlhE1JGlV+q+cAehlh028fSBpVnvFLUot66Tlo67qFoZzxR8Q5wCXA4cDGzHzXMOqQpLaNYs/BwM/4I+Jw4C+A/wycDrw8Ik5va3/200uC3rKgl/sXjKJhnPE/B7grM+8GiIgrgZcAt7exs1H8aytp8HrNgkMpRyIzB7vDiPOAczLzovJ8DfCzmfnGGctdDFxcnj4TuHOghe5rCfDQkGs4EOvrzajXB6Nfo/X1po36np6ZS2c2juy3ejJzA7Bh2HXsERGTmTkx7DpmY329GfX6YPRrtL7eDLK+YXyr52vA8o7nJ5c2SdIADCP4bwZOi4hTImIR8DLgY0OoQ5KqNPCunsycjog3Ap+m+TrnZZl526DrmIeR6XaahfX1ZtTrg9Gv0fp6M7D6Bv7hriRpuLxyV5IqY/BLUmWqD/6IOCci7oyIuyLibfuZ/6SIuKrMvykixkv7ERGxKSKmIuKOiHj7EGt8QUR8KSKmy3USnfPWRsRXymPtKNUXEWdExI0RcVtE3BoRF4xSfR3znxYRuyLiz0etvohYERGfKb+Dt+/5/Ryh+taXn+8dEfGn0dKlrF3U+Jby+twaEVsi4ukd80bhGNlvfa0dI5lZ7YPmw+WvAqcCi4AvA6fPWOb1wPvL9MuAq8r0fwGuLNNPAe4BxodU4zjw08BfA+d1tB8H3F3+XVymF49QfT8OnFamTwIeAI4dlfo65l8C/C3w56P08y3ztgG/UKaPAp4yKvUBZwH/VLZxOHAjcPaQXsOf3/PaAP+14zgelWNktvpaOUZqP+N/YviIzPwBsGf4iE4vATaV6auB1eWsJYGnRsQY8GTgB8B3hlFjZt6TmbcCj89Y9z8B12Xmv2bmw8B1wDmjUl9m/nNmfqVM3w88COxzleGw6gOIiGcDJwCf6XNdPdcXzRhXY5l5XVnuu5n5/0alPppj5EiasHsScATwjT7X122NWztemy/QXD8Eo3OM7Le+to6R2oN/GXBfx/NdpW2/y2TmNPBt4HiaPwKP0vwF3gm8JzP/dUg1trFut/qyj4h4Dk1AfLVPde0x7/oi4jDgj4Hf63NNnXp5/X4c+FZEfCQidkTEu6MZBHEk6svMG4GtNMfIA8CnM/OOPtcHB1/ja4D/Nc9156OX+p7Qz2NkZIdsWACeAzxG8/ZrMXBDRHw2y+Bz6l5EnAhsBtZm5j5n3UP0euCTmbmrpa7pXo0BzwdW0Zx8XAVcCFw6xJqeEBHPAH6SvWfX10XE8zPzhiHW9ApgAnjhsGo4kNnq6/cxUvsZfzfDRzyxTOnWOQb4Jk0f/6cy84eZ+SBNX2Yb42z0MsTFIIbH6GkfEfE04BPAOzLzC32uDXqr77nAGyPiHuA9wCsjot/3juilvl3ALaULYRr4e+BZ/S2vp/peCnyhdEF9l+Ys9rl9rg+6rDEi/iPwDuDFmfn9g1l3iPW1c4z080OMhfagOWO6GziFvR+6/PsZy7yBH/1w90Nl+veBD5bpp9IMK/3Tw6ixY9nL2ffD3X+heUeyuEwfN0L1LQK2AG8e5s94tvpmzLuQdj7c7eX1O7wsv7Q8/yDwhhGq7wLgs2UbR5Sf9a8M4zWkeVf0VcoHpR3tI3GMHKC+Vo6Rvv4AFuID+EXgn8uL/o7S9oc0f3Wh+XDqw8BdwBeBU0v7UaX9NprQf+sQazyT5uzvUZp3I7d1rPvqUvtdwKtGqT7gFcAPgVs6HmeMSn0ztnEhLQR/H36+vwDcCkzRBO+iUamP5g/TB4A7yjHyJ0M8Rj5L88Hynt+zj43YMbLf+to6RhyyQZIqU3sfvyRVx+CXpMoY/JJUGYNfkipj8EtSZQx+VSMizi3j2wy7jgsj4qSO5/dExJJh1qS6GPw65BxgvJpzgaEHP801ASfNtZDUFoNfIyMi3hoRv1Wm3xsRnyvTL4qIK8r0y6O5B8L/iYg/6lj3uxHxxxHxZeC5EfGujvHN3xMRZwEvBt4dEbdExI/N2Pevl21+OSL+sbRdGBF/HxHXlbPyN5Zx03dExBci4riy3Bnl+a0R8dGIWDxbexmvfgK4otTx5FLCm8qY9lMR8RNl/XdGxGURsS0i7t7z2pR5r4iIL5ZtfCAiDi+Py8v/Yyoifqcs+1sdr8WV/f/JacFp60o6Hz4O9gH8HPDhMn0DzZXSRwD/A3gtzVnyTpphaceAzwHnluUTOL9MHw/cyd57Sh9b/r2c2YdkmAKWzVj+QpqrOY8u+/w28Loy772Uy+hprpx9YZn+Q+B9c7RvAyY69n0P8KYy/XpgY5l+J/B5miGNl9BcFXsEzcBnHweOKMv9T+CVwLNphhhmxv/jfuBJnW0+6n54xq9Rsh14dhmU6vs0N+6YoBmB8gaaoQG2ZebubAYluwJ4QVn3MeCaMv1t4HvApRHxq0A3Y9T/E3B5RPwmzVADe2zNzEcyc3fZ7sdL+xQwHhHH0ITp9aV9E/CC2doPsP+PdLwG4x3tn8jM72fmQzRjsZ8ArKYJ+Zsj4pby/FSa8WBOjYg/i4hz2Ht/iFtp3mG8Apju4rXQIc7g18jIzB/SDJJ1Ic2Z7g00dyZ6Bs14Lwfyvcx8rGxnmmbY7KuBXwY+1cW+Xwf8Ac0oitsj4vgy6/sdiz3e8fxx+jus+Z7tPjZju5373zMvgE2ZeUZ5PDMz35nNjUR+huYdxeuAjWW9XwL+gmbkzpvLKLOqmMGvUXMDzY1P/rFMvw7YkZlJ0/XzwohYUj7AfTlw/cwNRMRRwDGZ+Ungd2jCEOARmm6bfUTEj2XmTZn534Hd/OgwurPKzG8DD0fE80vTGuD62drnqqNLW4DzIuLfldqPi4inl28GHZaZ19D8EXtWuZnM8szcSjOi7DE0AwyqYv7l16i5gWZM8hsz89GI+F5pIzMfiOZG1Vtpzno/kZnX7mcbRwPXRsSRZbm3lPYrgb8qH5Kel5mddzJ6d0ScVpbfQjN07hld1rwWeH9EPIWmu+VVc7RfXtr/jXmMT5+Zt0fEHwCfKcH+Q5rhw/8N+GBpA3g7TbfV35SupwD+NDO/dbD71KHF0TklqTJ29UhSZQx+SaqMwS9JlTH4JakyBr8kVcbgl6TKGPySVJn/D2KTuTSsbCXDAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_boxplot_and_hist(X_train, var)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAAsTAAALEwEAmpwYAAATD0lEQVR4nO3dfZBldX3n8fcHRtSsKAMzoWCYsX0gJtQmQR3ciIlRqSRuHpRkCWp8GIwJUkYT466rrqldK1VbpaWJukk2kVUC7rJRAxJ8WpUgQ0xUZEaRiZBEwkIYQRkMPm4UB777xzlTXJue6ab7nr6n+/d+Vd3qc889955P35n59Jlfn/s7qSokSe04bNYBJEmry+KXpMZY/JLUGItfkhpj8UtSYzbMOsBSbNq0qebm5mYdQ5LWlN27d99RVZvnr18TxT83N8euXbtmHUOS1pQkNy+03qEeSWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1Jg1cTqnZucX/90Z3PmVO2YdQyu08ZhNXHLxRbOOoZGw+HVId37lDr5xyq/OOsZMHHn1eevne7/6vFkn0Ig41CNJjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxqz74n/KU54y6wiStCxD9de6L35J0vey+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhqzYdYBDibJ2cDZANu2bVvRazk1s+S/A91rtMVfVecC5wJs3769VvJaO3funEakJlkW64f/DtYe5+OXJE2FxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYdV/8TkUraa0aqr/WffFLkr6XxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDVmw6wDaNw2HrMJrj5v1jFm5sh18r1vPGbTrCNoRCx+HdIlF1806wiSpsyhHklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktSYVNWsMywqyT7g5hntfhNwx4z2vZgxZ4Nx5zPb8o0535izwerne3hVbZ6/ck0U/ywl2VVV22edYyFjzgbjzme25RtzvjFng/Hkc6hHkhpj8UtSYyz+xZ076wCHMOZsMO58Zlu+MecbczYYST7H+CWpMR7xS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNWbDrAMsxaZNm2pubm7WMSRpTdm9e/cdC116cU0U/9zcHLt27Zp1DElaU5IseK1yh3okqTEWvyQ1xuKXpMYMWvxJjkpyUZK/S3J9kicmOTrJZUm+0H/dOGQGSdL3GvqI/63Ah6vqB4EfBa4HXg1cXlUnApf396U1acvWbSSZyW3L1m2z/va1Rg12Vk+ShwFPBs4CqKq7gLuSPBN4Sr/ZBcBO4FVD5ZCGdOveW3jW2z4xk32/+8WnzmS/WvuGPOJ/BLAP+NMkn03y9iT/Cji2qm7rt/kScOxCT05ydpJdSXbt27dvwJiS1JYhi38D8Djgj6vqscC3mDesU1UF1EJPrqpzq2p7VW3fvPk+nz+QJC3TkMW/F9hbVVf19y+i+0Hw5STHAfRfbx8wgyRpnsGKv6q+BNyS5DH9qtOA64D3ATv6dTuAS4fKIEm6r6GnbHgZcGGSI4AbgRfS/bB5T5IXATcDZw6cQZI0YdDir6prgO0LPHTakPuVJB2cn9yVpMZY/JLUmDUxLbO0mC1bt3Hr3ltmHUNaEyx+rQuz+gStn57VWuRQjyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjNMya2qcE19aGyx+Tc2s5sQH58WX7g+HeiSpMRa/JDXG4pekxlj8ktQYi1+SGrPui3/L1m0kWfXblq3bZv2tS9KC1v3pnLM6xdDTCyWN1bo/4pckfS+LX5IaY/FLUmMGL/4khyf5bJIP9PcfkeSqJDckeXeSI4bOIEm612oc8f8WcP3E/TcAb66qRwN3Ai9ahQzS+nPYBs9Y07IMelZPkhOAnwP+K/CKJAGeBvxKv8kFwOuAPx4yh7Qu3bPfM9a0LEMf8b8F+I/APf39Y4CvVtX+/v5eYMtCT0xydpJdSXbt27dv4JiS1I7Bij/JzwO3V9Xu5Ty/qs6tqu1VtX3z5s1TTidJ7RpyqOdJwDOS/CzwIOChwFuBo5Js6I/6TwC+OGAGSdI8SzriT/KkpaybVFWvqaoTqmoOeDbwsap6LnAFcEa/2Q7g0vuVWJK0Iksd6vmDJa5bilfR/aL3Brox/3cs83UkSctwyKGeJE8ETgU2J3nFxEMPBQ5f6k6qaiews1++EXjC/Q0qSZqOxcb4jwAe0m935MT6r3PvcI2klvSfH5iF40/Yyhdv+aeZ7Hs9OWTxV9WVwJVJzq+qm1cpk6Qxm9HnB8DPEEzLUs/qeWCSc4G5yedU1dOGCKWV2bJ1G7fuvWXWMSSN1FKL/8+BPwHeDtw9XBxNg9cgkHQoSy3+/VXltAqStA4s9XTO9yd5SZLjkhx94DZoMknSIJZ6xL+j//rKiXUFPHK6cSRJQ1tS8VfVI4YOIklaHUsq/iQvWGh9Vb1zunEkSUNb6lDPKRPLDwJOAz4DWPyStMYsdajnZZP3kxwFvGuIQJKkYS13Pv5vAY77S9IatNQx/vfTncUD3eRsPwS8Z6hQ68IM5zORpENZ6hj/myaW9wM3V9XeAfKsH85nImmkljTU00/W9nd0M3RuBO4aMpQkaThLvQLXmcCngV8GzgSuSuK0zJK0Bi11qOe1wClVdTtAks3AXwIXDRVMkjSMpZ7Vc9iB0u995X48V5LWtC1bt5Fk1W9btm4b5PtZ6hH/h5N8BPiz/v6zgA8NkkiSRma9TXW+2DV3Hw0cW1WvTPJLwI/3D30SuHCQRJKkQS12xP8W4DUAVfVe4L0ASX64f+wXBswmSRrAYuP0x1bVnvkr+3VzgySSJA1qseI/6hCPPXiKOSRJq2Sx4t+V5Nfnr0zya8DuYSJJkoa02Bj/y4FLkjyXe4t+O3AE8IsD5pIkDeSQxV9VXwZOTfJU4F/3qz9YVR8bPJkkaRBLnY//CuCKgbNIklaBn76VpMZY/JLUGItfkhozWPEn2ZrkiiTXJfl8kt/q1x+d5LIkX+i/bhwqgyTpvoY84t8P/PuqOgn4MeA3kpwEvBq4vKpOBC7v70uSVslgxV9Vt1XVZ/rlbwDXA1uAZwIX9JtdAJw+VAZJ0n2tyhh/kjngscBVdPP/3NY/9CXg2IM85+wku5Ls2rdv32rElKQmDF78SR4CXAy8vKq+PvlYVRVQCz2vqs6tqu1VtX3z5s1Dx5SkZgxa/EkeQFf6F/bTOgN8Oclx/ePHAbcf7PmSpOkb8qyeAO8Arq+q35946H3Ajn55B3DpUBkkSfe11EsvLseTgOcDe5Jc06/7T8DrgfckeRFwM3DmgBkkSfMMVvxV9ddADvLwaUPtV5J0aH5yV5IaY/FLUmOGHOOXpOk6bAPdeSNaCYtf0tpxz36e9bZPrPpu3/3iU1d9n0NyqEeSGmPxS1JjLH5JaozFL0mNsfglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjZlL8SZ6e5O+T3JDk1bPIIEmtWvXiT3I48EfAvwVOAp6T5KTVziFJrZrFEf8TgBuq6saqugt4F/DMGeSQpCalqlZ3h8kZwNOr6tf6+88H/k1VvXTedmcDZ/d3HwP8/aoGvdcm4I4Z7XsxY84G485ntuUbc74xZ4PVz/fwqto8f+WGVQxwv1TVucC5s86RZFdVbZ91joWMORuMO5/Zlm/M+cacDcaTbxZDPV8Etk7cP6FfJ0laBbMo/quBE5M8IskRwLOB980ghyQ1adWHeqpqf5KXAh8BDgfOq6rPr3aO+2Hmw02HMOZsMO58Zlu+MecbczYYSb5V/+WuJGm2/OSuJDXG4pekxjRb/ItNG5HkyUk+k2R//9mDycd2JPlCf9sxwnwfTvLVJB8YU7YkJyf5ZJLPJ7k2ybNGlu/h/fpr+oznjCXbxOMPTbI3yR+OKVuSu/v37Zokg5ysscJ825J8NMn1Sa5LMjeGbEmeOvG+XZPk20lOn2a2BVVVcze6Xyr/I/BI4Ajgc8BJ87aZA34EeCdwxsT6o4Eb+68b++WNY8nXP3Ya8AvAB0b23v0AcGK/fDxwG3DUiPIdATywX34IcBNw/BiyTTz+VuB/A384lvetf+yb0/67NuV8O4Gfmviz/b6xZJvY5mjgn6eZ7WC3Vo/4F502oqpuqqprgXvmPfdngMuq6p+r6k7gMuDpI8pHVV0OfGPKmVacrar+oaq+0C/fCtwO3OdThTPMd1dVfae/+0Cm/z/iFf25Jnk8cCzw0SnnWnG2VbDsfP1cYBuq6rJ+u29W1f8bQ7Z5zgD+z5SzLajV4t8C3DJxf2+/bujnLtVq7GO5ppItyRPojo7+cUq5DlhRviRbk1zbv8Yb+h9QM8+W5DDg94D/MMU8k1b65/qgJLuSfGqgoYqV5PsB4KtJ3pvks0nemG6yyDFkm/Rs4M+mkmgRrRa/ZijJccD/BF5YVbM4ejyoqrqlqn4EeDSwI8mxs87UewnwoaraO+sgB/Hw6qYi+BXgLUkeNetAEzYAP0H3Q/MUuiGZs2YZaL7+38QP032+aXCtFv9Kpo1YjSknxjytxYqyJXko8EHgtVX1qSlngym9d/2R/t/SFca0rCTbE4GXJrkJeBPwgiSvH0k2quqL/dcb6cbTHzvFbLCyfHuBa/qhmP3AXwCPG0m2A84ELqmq704t1SG0WvwrmTbiI8BPJ9mYZCPw00z/p/SYp7VYdrZ++0uAd1bVRSPMd0KSB/fLG4EfZ7qzwi47W1U9t6q2VdUc3ZHrO6tqmhcxWsn7tjHJA/vlTcCTgOummG1F+frnHpXkwO+TnjblfNP49/ocVmmYB2jzrJ7qfoP+s8A/0I0xv7Zf97vAM/rlU+iOFL4FfAX4/MRzfxW4ob+9cIT5Pg7sA/6l3+ZnxpANeB7wXeCaidvJY3nvgJ8CrqU7K+Na4OyxZJv3Gmcx5bN6Vvi+nQrs6d+3PcCLRvhv4sCf7R7gfOCIEWWbo/sfwmFDvG8L3ZyyQZIa0+pQjyQ1y+KXpMZY/JLUGItfkhpj8UtSYyx+NSPJ6f28LbPOcVaS4yfu39Sf/y6tCotf684h5mE5HZh58dOdh3/8YhtJQ7H4NRpJXpnkN/vlNyf5WL/8tCQX9svPSbInyd8mecPEc7+Z5PeSfA54YpLX9/OuX5vkTUlOBZ4BvLGf9/xR8/b9y/1rfi7JX/XrzkryF0ku64/KX5rkFf1EX59KcnS/3cn9/WuTXNJ/6nfB9f1c7NuBC/scD+4jvKyfr31Pkh/sn/+6JOcl2ZnkxgPvTf/Y85J8un+NtyU5vL+d338fe5L8dr/tb068F++a/p+c1pzV+qSYN2+L3YAfA/68X/448GngAcB/AV5Md5T8T3RTOW8APgac3m9fwJn98jF0Uy0c+IDiUf3X8zn4XOh7gC3ztj+L7tPZR/b7/BpwTv/Ym4GX98vXAj/ZL/8u8JZF1u8Etk/s+ybgZf3yS4C398uvAz5BN0X0JrpPfD4A+CHg/cAD+u3+O/AC4PF0U4Yz7/u4lXuvM3DUrP+cvc3+5hG/xmQ38Ph+IrfvAJ+kOzr+CbofBKcAO6tqX3WTbV0IPLl/7t3Axf3y14BvA+9I8kvAUuY3/xvg/CS/TndhjQOuqKpvVNW+/nXf36/fA8wleRhdmV7Zr78AePLB1h9i/++deA/mJtZ/sKq+U1V30F2/4Fi6C+08Hrg6yTX9/UfSXRTokUn+IMnTga/3r3Et3f8wngfsX8J7oXXO4tdoVDcz4f+lO9L+BF3ZP5VuiuTrF3n6t6vq7v519tNdHOMi4OeBDy9h3+cAv0M3y+LuJMf0D31nYrN7Ju7fQ/e/jmk58Lp3z3vdyf0feCzABVV1cn97TFW9rroLA/0o3f8ozgHe3j/v54A/opuR8uok08ytNcji19h8nG72yb/ql88BPltVRTf085NJNvW/wH0OcOX8F0jyEOBhVfUh4LfpyhC6q5IdudBOkzyqqq6qqv9MN8Hd1oW2m6+qvgbcmeTA9M3PB6482PrFcizR5cAZSb6/z350uusFb6Kb6Otiuh9ij0t3AZetVXUF8CrgYXSXHlTD/Mmvsfk48Frgk1X1rSTf7tdRVbelu5D1FXRHvR+sqksXeI0jgUuTPKjf7hX9+ncB/6P/JekZVTV59a83Jjmx3/5yupkmT15i5h3AnyT5Prrhlhcusv78fv2/0M2zf79U1XVJfgf4aF/s3wV+g2421j/t1wG8hm7Y6n/1Q08B/ltVffX+7lPri7NzSlJjHOqRpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4Jakx/x9gI63ycb9evwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_boxplot_and_hist(train_t, var)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature-engine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-1 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-1 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-1 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-1 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Winsorizer(capping_method=&#x27;quantiles&#x27;, fold=0.05, tail=&#x27;both&#x27;,\n",
       "           variables=[&#x27;worst smoothness&#x27;, &#x27;worst texture&#x27;])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;Winsorizer<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Winsorizer(capping_method=&#x27;quantiles&#x27;, fold=0.05, tail=&#x27;both&#x27;,\n",
       "           variables=[&#x27;worst smoothness&#x27;, &#x27;worst texture&#x27;])</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "Winsorizer(capping_method='quantiles', fold=0.05, tail='both',\n",
       "           variables=['worst smoothness', 'worst texture'])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "capper = Winsorizer(\n",
    "    variables=[\"worst smoothness\", \"worst texture\"],\n",
    "    capping_method=\"quantiles\",\n",
    "    tail=\"both\",\n",
    "    fold=0.05,\n",
    ")\n",
    "\n",
    "capper.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'worst smoothness': 0.0960535, 'worst texture': 16.7975}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "capper.left_tail_caps_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'worst smoothness': 0.17321499999999998, 'worst texture': 36.2775}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "capper.right_tail_caps_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(worst smoothness     0.07117\n",
       " worst texture       12.02000\n",
       " dtype: float64,\n",
       " worst smoothness     0.173215\n",
       " worst texture       36.277500\n",
       " dtype: float64)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_t = capper.transform(X_train)\n",
    "test_t = capper.transform(X_test)\n",
    "\n",
    "X_train[capper.variables_].min(), train_t[capper.variables_].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fsml",
   "language": "python",
   "name": "fsml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
